In [1]:
import plotly.tools as tls
tls.embed('https://plot.ly/~chris/7365')
Out[1]:
In [2]:
import pandas as pd
from sqlalchemy import create_engine #database connection
import datetime as dt
from IPython.display import display
import plotly.plotly as py #interactive graphing
from plotly.graph_objs import Bar, Scatter, Marker, Layout
In [3]:
import zipfile
import requests
import io
import random
import seaborn as sns
import numpy as np
from datetime import datetime
%matplotlib inline
In [ ]:
In [6]:
r = requests.get('https://s3.amazonaws.com/tripdata/201606-citibike-tripdata.zip')
with zipfile.ZipFile(io.BytesIO(r.content)) as ar:
trip_data = pd.read_csv(ar.open('201606-citibike-tripdata.csv'))
In [4]:
In [7]:
trip_data.head(5)
Out[7]:
In [8]:
len(trip_data['bikeid'].unique())
Out[8]:
In [9]:
len(trip_data['start station name'].unique())
Out[9]:
In [10]:
trip_data.describe()
Out[10]:
In [11]:
trip_data[trip_data['gender'] > 0].describe()
Out[11]:
In [14]:
trip_data.starttime = trip_data.starttime.map(lambda x: np.datetime64(
datetime.strptime(x,"%m/%d/%Y %H:%M:%S")))
trip_data.stoptime = trip_data.stoptime.map(lambda x: np.datetime64(
datetime.strptime(x,"%m/%d/%Y %H:%M:%S")))
In [15]:
trip_data.dtypes
Out[15]:
In [17]:
random_bikes = random.sample(list(trip_data['bikeid'].unique()),10)
In [18]:
sample_trips = pd.DataFrame(columns=trip_data.columns)
for day, bike in zip([6]*7,random_bikes):
selected_trips = trip_data[(trip_data['starttime'] < datetime(2016,3,day + 7) & (selected_trips[selected_trips['stoptime'] >= datetime(2016,3, day) &
(selected_trips = selected_trips[selected_trips['bikeid'] == bike])]
sample_trips = sample_trips.append(selected_trips)
In [ ]:
print (sample_trips)
In [ ]: